#Mac
knitr::opts_knit$set(root.dir = "~/Dropbox/R backup/SDCT - R")
#Windows
#knitr::opts_knit$set(root.dir = "C:/Users/rowe0122/Dropbox/R backup/SDCT - R")
library(knitr)
Load data
load(file="Baseline.Rdata")
load(file="SDCTCOW.Rdata")
load(file="SDCTCOWDHI.Rdata")
SDCTCOW = SDCTCOW %>%
mutate(Tx = recode(SDCTCOW$Tx,
"0" = "Blanket", "1" = "Culture", "2" = "Algorithm"))
Import file BL (cows included for analysis) for descriptive statistics
library(readr)
BL <- read_csv("BL.csv", col_types = cols(X1 = col_skip()))
Missing column names filled in: 'X1' [1]
head(BL)
print(summarytools::dfSummary(BL, valid.col=FALSE, graph.magnif=0.8, style="grid"), method = "render")
NAs introduced by coercion
| No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | ||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | Site [character] | 1. CA 2. IA 3. MN 4. NY |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 2 | FARMID [numeric] | Mean (sd) : 4 (1.5) min < med < max: 1 < 4 < 7 IQR (CV) : 2 (0.4) |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 3 | Tx [numeric] | Mean (sd) : 1 (0.8) min < med < max: 0 < 1 < 2 IQR (CV) : 2 (0.8) |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 4 | Age [numeric] | Mean (sd) : 47 (15.4) min < med < max: 30.4 < 44.5 < 122.3 IQR (CV) : 22.6 (0.3) | 682 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 5 | Parity [numeric] | Mean (sd) : 1.9 (0.8) min < med < max: 1 < 2 < 3 IQR (CV) : 2 (0.4) |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 6 | DOSCC [numeric] | Mean (sd) : 4.4 (1.2) min < med < max: 1.6 < 4.3 < 8.6 IQR (CV) : 1.6 (0.3) | 401 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 7 | DODIM [numeric] | Mean (sd) : 325 (45.8) min < med < max: 252 < 306 < 584 IQR (CV) : 49.5 (0.1) | 179 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 8 | PrevCM [numeric] | Min : 0 Mean : 0.1 Max : 1 |
|
0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 9 | PrevSCCHI [numeric] | Mean (sd) : 5.4 (1.3) min < med < max: 2.9 < 5.3 < 9.2 IQR (CV) : 1.8 (0.2) | 629 distinct values | 0 (0%) | |||||||||||||||||||||||||||||||||||||||||||
| 10 | DPlength [numeric] | Mean (sd) : 55.7 (7.9) min < med < max: 30 < 56 < 87 IQR (CV) : 9 (0.1) | 54 distinct values | 0 (0%) |
Generated by summarytools 0.9.3 (R version 3.6.0)
2019-07-02
Comparison of demographics for each treatment group at dry-off
library(table1)
table1(~ Age + DOMY + DOSCC + PrevSCCHI + factor(PrevCM) + factor(Parity) | Tx, data=Baseline)
| 0 (n=1568) |
1 (n=1592) |
2 (n=1544) |
Overall (n=4704) |
|
|---|---|---|---|---|
| Age | ||||
| Mean (SD) | 47.2 (14.6) | 46.9 (15.9) | 46.1 (15.0) | 46.7 (15.2) |
| Median [Min, Max] | 44.7 [30.8, 104] | 44.1 [30.4, 111] | 44.5 [30.4, 122] | 44.4 [30.4, 122] |
| DOMY | ||||
| Mean (SD) | 26.7 (8.87) | 27.9 (8.81) | 27.3 (8.38) | 27.3 (8.70) |
| Median [Min, Max] | 27.7 [4.54, 49.4] | 29.0 [1.81, 49.4] | 29.0 [2.72, 49.4] | 28.6 [1.81, 49.4] |
| DOSCC | ||||
| Mean (SD) | 4.45 (1.21) | 4.38 (1.23) | 4.45 (1.22) | 4.43 (1.22) |
| Median [Min, Max] | 4.33 [1.61, 8.35] | 4.23 [1.61, 8.59] | 4.39 [1.61, 8.25] | 4.32 [1.61, 8.59] |
| PrevSCCHI | ||||
| Mean (SD) | 5.37 (1.30) | 5.49 (1.23) | 5.40 (1.29) | 5.42 (1.28) |
| Median [Min, Max] | 5.23 [2.87, 9.21] | 5.37 [3.14, 9.21] | 5.30 [2.94, 9.21] | 5.30 [2.87, 9.21] |
| factor(PrevCM) | ||||
| 0 | 1344 (85.7%) | 1360 (85.4%) | 1352 (87.6%) | 4056 (86.2%) |
| 1 | 224 (14.3%) | 232 (14.6%) | 192 (12.4%) | 648 (13.8%) |
| factor(Parity) | ||||
| 1 | 616 (39.3%) | 732 (46.0%) | 660 (42.7%) | 2008 (42.7%) |
| 2 | 516 (32.9%) | 416 (26.1%) | 488 (31.6%) | 1420 (30.2%) |
| 3 | 436 (27.8%) | 444 (27.9%) | 396 (25.6%) | 1276 (27.1%) |
Note that this table excludes cows (n=32) that were not included cow-level analysis (eg. cows with long or short dry periods).
table1(~ Age + DOMY + DPlength + DOSCC + PrevSCCHI + PrevCM + Parity | Tx, data=SDCTCOW)
| Blanket (n=407) |
Culture (n=410) |
Algorithm (n=394) |
Overall (n=1211) |
|
|---|---|---|---|---|
| Age | ||||
| Mean (SD) | 47.5 (15.0) | 47.2 (16.2) | 46.3 (15.1) | 47.0 (15.4) |
| Median [Min, Max] | 44.8 [30.8, 114] | 44.4 [30.4, 119] | 44.6 [30.4, 122] | 44.5 [30.4, 122] |
| DOMY | ||||
| Mean (SD) | 26.8 (8.88) | 28.0 (8.89) | 27.3 (8.38) | 27.4 (8.73) |
| Median [Min, Max] | 27.7 [4.54, 49.4] | 29.0 [1.81, 49.4] | 29.0 [2.72, 49.4] | 29.0 [1.81, 49.4] |
| DPlength | ||||
| Mean (SD) | 55.3 (7.89) | 55.9 (7.61) | 55.9 (8.16) | 55.7 (7.88) |
| Median [Min, Max] | 56.0 [33.0, 84.0] | 56.0 [33.0, 84.0] | 56.0 [30.0, 87.0] | 56.0 [30.0, 87.0] |
| DOSCC | ||||
| Mean (SD) | 4.46 (1.22) | 4.39 (1.22) | 4.45 (1.21) | 4.43 (1.22) |
| Median [Min, Max] | 4.36 [1.61, 8.35] | 4.23 [1.61, 8.59] | 4.39 [1.61, 8.25] | 4.33 [1.61, 8.59] |
| PrevSCCHI | ||||
| Mean (SD) | 5.40 (1.32) | 5.50 (1.22) | 5.40 (1.29) | 5.43 (1.28) |
| Median [Min, Max] | 5.23 [2.87, 9.21] | 5.37 [3.14, 9.21] | 5.30 [2.94, 9.21] | 5.30 [2.87, 9.21] |
| PrevCM | ||||
| 0 | 350 (86.0%) | 351 (85.6%) | 345 (87.6%) | 1046 (86.4%) |
| 1 | 57 (14.0%) | 59 (14.4%) | 49 (12.4%) | 165 (13.6%) |
| Parity | ||||
| 1 | 158 (38.8%) | 185 (45.1%) | 167 (42.4%) | 510 (42.1%) |
| 2 | 133 (32.7%) | 108 (26.3%) | 123 (31.2%) | 364 (30.1%) |
| 3 | 116 (28.5%) | 117 (28.5%) | 104 (26.4%) | 337 (27.8%) |
Groups appear to be well balanced before and after exclusions, indicating that randomization was successful, and that drop-out is unlikely to affect our assumption of exchangeability between treatment groups.
Comparison of herds
library(table1)
table1(~ Tx + Age + DOMY + DPlength + DODIM + DOSCC + PrevSCCHI + PrevCM + Parity | FARMID, data=SDCTCOW)
| 1 (n=76) |
2 (n=82) |
3 (n=263) |
4 (n=388) |
5 (n=242) |
6 (n=47) |
7 (n=113) |
Overall (n=1211) |
|
|---|---|---|---|---|---|---|---|---|
| Tx | ||||||||
| Blanket | 28 (36.8%) | 27 (32.9%) | 93 (35.4%) | 122 (31.4%) | 81 (33.5%) | 16 (34.0%) | 40 (35.4%) | 407 (33.6%) |
| Culture | 27 (35.5%) | 24 (29.3%) | 87 (33.1%) | 136 (35.1%) | 84 (34.7%) | 16 (34.0%) | 36 (31.9%) | 410 (33.9%) |
| Algorithm | 21 (27.6%) | 31 (37.8%) | 83 (31.6%) | 130 (33.5%) | 77 (31.8%) | 15 (31.9%) | 37 (32.7%) | 394 (32.5%) |
| Age | ||||||||
| Mean (SD) | 48.7 (16.0) | 46.8 (14.3) | 46.8 (13.4) | 47.5 (16.7) | 47.0 (15.9) | 42.9 (11.2) | 46.6 (16.2) | 47.0 (15.4) |
| Median [Min, Max] | 45.2 [30.6, 90.2] | 44.6 [30.7, 95.7] | 44.8 [30.4, 95.5] | 44.6 [31.7, 122] | 44.7 [30.4, 115] | 43.7 [31.0, 82.6] | 44.6 [31.0, 114] | 44.5 [30.4, 122] |
| DOMY | ||||||||
| Mean (SD) | 23.0 (6.30) | 26.6 (9.64) | 29.5 (6.91) | 30.3 (7.84) | 19.6 (8.55) | 30.7 (4.81) | 30.6 (6.33) | 27.4 (8.73) |
| Median [Min, Max] | 22.7 [9.07, 38.1] | 28.6 [2.72, 44.0] | 30.4 [8.62, 47.6] | 30.4 [2.72, 49.4] | 20.0 [1.81, 39.9] | 30.8 [16.8, 40.8] | 30.8 [15.4, 48.5] | 29.0 [1.81, 49.4] |
| DPlength | ||||||||
| Mean (SD) | 53.2 (11.8) | 52.5 (5.63) | 57.8 (6.39) | 55.6 (6.49) | 55.7 (8.16) | 60.3 (7.19) | 53.0 (10.6) | 55.7 (7.88) |
| Median [Min, Max] | 52.0 [32.0, 85.0] | 53.0 [30.0, 63.0] | 58.0 [35.0, 87.0] | 56.0 [35.0, 74.0] | 55.0 [35.0, 85.0] | 60.0 [47.0, 84.0] | 55.0 [33.0, 77.0] | 56.0 [30.0, 87.0] |
| DODIM | ||||||||
| Mean (SD) | 346 (51.4) | 334 (52.7) | 314 (37.9) | 325 (44.4) | 329 (51.2) | 328 (43.4) | 321 (39.1) | 325 (45.8) |
| Median [Min, Max] | 326 [292, 584] | 304 [293, 512] | 293 [266, 475] | 296 [283, 468] | 308 [252, 512] | 314 [258, 444] | 307 [272, 476] | 306 [252, 584] |
| DOSCC | ||||||||
| Mean (SD) | 4.54 (1.04) | 4.08 (1.15) | 4.00 (0.907) | 4.47 (1.16) | 5.18 (1.39) | 4.34 (0.966) | 3.91 (1.09) | 4.43 (1.22) |
| Median [Min, Max] | 4.61 [2.56, 7.59] | 3.96 [2.30, 8.17] | 3.98 [2.60, 6.68] | 4.48 [1.61, 8.59] | 5.13 [1.61, 8.35] | 4.47 [2.56, 6.61] | 3.78 [2.56, 7.38] | 4.33 [1.61, 8.59] |
| PrevSCCHI | ||||||||
| Mean (SD) | 5.48 (1.31) | 5.06 (1.31) | 4.77 (1.08) | 5.77 (1.20) | 6.13 (1.11) | 5.02 (0.927) | 4.73 (1.17) | 5.43 (1.28) |
| Median [Min, Max] | 5.31 [3.43, 9.21] | 4.78 [3.33, 9.21] | 4.61 [2.87, 8.21] | 5.65 [3.00, 9.21] | 6.07 [3.40, 9.21] | 5.09 [3.09, 7.06] | 4.53 [2.94, 8.00] | 5.30 [2.87, 9.21] |
| PrevCM | ||||||||
| 0 | 62 (81.6%) | 81 (98.8%) | 228 (86.7%) | 337 (86.9%) | 191 (78.9%) | 46 (97.9%) | 101 (89.4%) | 1046 (86.4%) |
| 1 | 14 (18.4%) | 1 (1.2%) | 35 (13.3%) | 51 (13.1%) | 51 (21.1%) | 1 (2.1%) | 12 (10.6%) | 165 (13.6%) |
| Parity | ||||||||
| 1 | 31 (40.8%) | 33 (40.2%) | 92 (35.0%) | 180 (46.4%) | 104 (43.0%) | 23 (48.9%) | 47 (41.6%) | 510 (42.1%) |
| 2 | 15 (19.7%) | 28 (34.1%) | 95 (36.1%) | 101 (26.0%) | 73 (30.2%) | 17 (36.2%) | 35 (31.0%) | 364 (30.1%) |
| 3 | 30 (39.5%) | 21 (25.6%) | 76 (28.9%) | 107 (27.6%) | 65 (26.9%) | 7 (14.9%) | 31 (27.4%) | 337 (27.8%) |
Kaplan Meier curve
library(ggplot2)
library(survminer)
KM <- survfit(Surv(CMTAR, CM) ~ Tx, data = SDCTCOW)
knitr::opts_chunk$set(fig.width = 800, fig.height = 900)
ggsurvplot(KM, data = SDCTCOW, title = "", pval = T, conf.int = F,risk.table.col = "Tx",risk.table = T, risk.table.y.text.col = TRUE , surv.plot.height = 5, legend.labs = c("Blanket","Culture","Algorithm"), tables.theme = theme_cleantable(), ggtheme = theme_bw())
Log-Rank test
survdiff(Surv(CMTAR, CM) ~ Tx,data=SDCTCOW,rho=0)
Call:
survdiff(formula = Surv(CMTAR, CM) ~ Tx, data = SDCTCOW, rho = 0)
N Observed Expected (O-E)^2/E (O-E)^2/V
Tx=Blanket 407 59 52 0.954 1.428
Tx=Culture 410 50 54 0.298 0.454
Tx=Algorithm 394 48 51 0.180 0.267
Chisq= 1.4 on 2 degrees of freedom, p= 0.5
Model type: Cox proportional hazards analysis with farm included as a cluster variable (robust sandwich standard error estimator) to account for lack of indepedence.
Step 1: Identify potential confouders using a directed acyclic graph (DAG)
Step 2: Identify correlated variables using pearson and kendalls correlation coefficients
Step 3: Create model with all potential confounders
Step 4: Investigate if covariates meet proportional hazards assumption
Step 5: Investigate potential effect measure modification
Step 6: Remove unneccesary covariates in backwards stepwise fashion using 10% rule (i.e. if hazard ratio for algorithm or culture changes by >10% after removing the covariate, the covariate is retained in the model)
Step 7: Report final model
This is used to identify variables that could be confounders if they are not balanced between treatment groups.
library(DiagrammeR)
mermaid("graph LR
T(Treatment)-->U(Clinical mastitis)
A(Age)-->T
P(Parity)-->T
M(Yield at dry-off)-->T
S(SCC during prev lactation)-->T
C(CM in prev lact)-->T
A-->U
P-->U
M-->U
S-->U
C-->U
C-->M
P-->C
P-->S
P-->M
A-->P
A-->C
A-->S
A-->M
M-->S
C-->S
style A fill:#FFFFFF, stroke-width:0px
style T fill:#FFFFFF, stroke-width:2px
style P fill:#FFFFFF, stroke-width:0px
style M fill:#FFFFFF, stroke-width:0px
style S fill:#FFFFFF, stroke-width:0px
style C fill:#FFFFFF, stroke-width:0px
style I fill:#FFFFFF, stroke-width:0px
style U fill:#FFFFFF, stroke-width:2px
")
According to this DAG, I may need to control for the following variables.
Parity [“Parity”]
Age [“Age”]
Yield at most recent test before dry off [“DOMY”]
Somatic cell count at last herd test during previous lactation [“DOSCC” or “PrevSCCHi”] <- likely to be correlated
Clinical mastitis in previous lactation [“PrevCM”]
Cox proportional hazards regression for clinical mastitis (1-120 DIM)
Cows that did not calve or that had long/short dry periods are excluded. Cows with clinical mastitis prior to calving are included.
Reasons for R censor = 120 DIM or culling/death
library(broom)
library(survival)
SR <- coxph(Surv(CMTAR, CM) ~ Parity + DOMY + DOSCC + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
coxph(Surv(CMTAR, CM) ~ Parity + DOMY + DOSCC + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
Call:
coxph(formula = Surv(CMTAR, CM) ~ Parity + DOMY + DOSCC + PrevCM +
Tx + cluster(FARMID), data = SDCTCOW)
coef exp(coef) se(coef) robust se z p
Parity2 0.262972 1.300790 0.198272 0.127747 2.059 0.0395
Parity3 0.328493 1.388873 0.204918 0.292867 1.122 0.2620
DOMY -0.006115 0.993904 0.010031 0.018794 -0.325 0.7449
DOSCC 0.028361 1.028767 0.076123 0.105980 0.268 0.7890
PrevCM1 0.357508 1.429761 0.212265 0.228486 1.565 0.1177
TxCulture -0.193527 0.824048 0.192911 0.177916 -1.088 0.2767
TxAlgorithm -0.173991 0.840304 0.194505 0.162762 -1.069 0.2851
Likelihood ratio test=10.21 on 7 df, p=0.1771
n= 1211, number of events= 157
#tidy(coxph(Surv(CMTAR, CM) ~ Tx + cluster(FARMID), data=SDCTCOW))
library(broom)
library(survival)
SR <- coxph(Surv(CMTAR, CM) ~ Parity + DOMY + DOSCC + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
coxph(Surv(CMTAR, CM) ~ Parity + DOMY + DOSCC + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
Call:
coxph(formula = Surv(CMTAR, CM) ~ Parity + DOMY + DOSCC + PrevCM +
Tx + cluster(FARMID), data = SDCTCOW)
coef exp(coef) se(coef) robust se z p
Parity2 0.262972 1.300790 0.198272 0.127747 2.059 0.0395
Parity3 0.328493 1.388873 0.204918 0.292867 1.122 0.2620
DOMY -0.006115 0.993904 0.010031 0.018794 -0.325 0.7449
DOSCC 0.028361 1.028767 0.076123 0.105980 0.268 0.7890
PrevCM1 0.357508 1.429761 0.212265 0.228486 1.565 0.1177
TxCulture -0.193527 0.824048 0.192911 0.177916 -1.088 0.2767
TxAlgorithm -0.173991 0.840304 0.194505 0.162762 -1.069 0.2851
Likelihood ratio test=10.21 on 7 df, p=0.1771
n= 1211, number of events= 157
#tidy(coxph(Surv(CMTAR, CM) ~ Tx + cluster(FARMID), data=SDCTCOW))
Parity, Tx and DOSCC violate the assumption of proportional hazards
SR <- cox.zph(SR)
SR
rho chisq p
Parity2 -0.1172 4.313 0.037831
Parity3 -0.1245 11.662 0.000638
DOMY 0.0370 1.843 0.174616
DOSCC 0.1043 5.489 0.019134
PrevCM1 0.0207 0.394 0.530189
TxCulture 0.1564 9.946 0.001612
TxAlgorithm 0.0207 0.480 0.488505
GLOBAL NA 14.007 0.051053
Schoenfield residual plots to assess assumption of proportional hazards.
ggcoxzph(SR,var = c("TxCulture","TxAlgorithm"))
ggcoxzph(SR,var = c("DOSCC", "DOMY"))
ggcoxzph(SR,var = c("Parity2","Parity3"))
ggcoxzph(SR,var = c("PrevCM1"))
I will deal with these covariates using stratification (i.e. fitting seperate baseline hazards)
I must create categorical variables to allow for this (not shown here).
Dry off milk yield [“DOMY”] -> median split (“DOMYcat” = 0 / 1).
Dry off SCC - split at 200,000 cells (subclinical mastitis [“DOSCM”] = 0 / 1)
Show new variables
SDCTCOWcheck <- SDCTCOW %>% select(Tx,CM,CMTAR,Cull2,Cull2TAR,DOSCC,DOSCM,DOMY,DOMYcat,Parity,PrevCM)
head(SDCTCOWcheck)
Run new cox model with stratified variables
SR <- coxph(Surv(CMTAR, CM) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
tidy(coxph(Surv(CMTAR, CM) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW))
Repeat Schoenfeld residuals
Tx no longer p < 0.05, but is close to the threshold (p = 0.08).
SR <- cox.zph(SR)
SR
rho chisq p
PrevCM1 0.0273 0.185 0.6674
TxCulture 0.1393 3.094 0.0786
TxAlgorithm 0.0442 0.419 0.5173
GLOBAL NA 3.320 0.3449
Will split time into 1-60 and 61-120 DIM to see if this difference is meaningful.
Cox model 1-60 DIM
Beta coefficient for Culture = -0.43, Algorithm = -0.08
SR <- coxph(Surv(CMTAR60, CM60) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
tidy(coxph(Surv(CMTAR60, CM60) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW))
Cox model 61-120 DIM Beta coefficient for Culture = 0.008, Algorithm = -.401
SR <- coxph(Surv(CMTAR60120, CM60120) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
tidy(coxph(Surv(CMTAR60120, CM60120) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW))
Decision: These differences aren’t sufficient in my opinion to warrant pursuing the 1-60, 61-120 DIM models. An average of the two is appropriate.
Unable to evluate interaction between Tx & FARMID using Cox regression - Bizzare SE’s, won’t converge.
mm0 <- coxph(Surv(CMTAR, CM) ~ Tx + FARMID + Tx:FARMID + PrevCM + strata(Parity) + strata(DOMYcat) + strata(DOSCM), data=SDCTCOW)
Loglik converged before variable 19 ; coefficient may be infinite.
tidy(mm0)
Will use logistic regression to assess for interaction between Tx and farm: P > 0.05.
mm0 <- glm(CM ~ Tx*FARMID + PrevCM + Parity + DOMYcat + DOSCM, data=SDCTCOW, family=binomial)
car::Anova(mm0)
Analysis of Deviance Table (Type II tests)
Response: CM
LR Chisq Df Pr(>Chisq)
Tx 0.881 2 0.6437
FARMID 35.339 6 3.704e-06 ***
PrevCM 0.763 1 0.3824
Parity 1.923 2 0.3823
DOMYcat 1.466 1 0.2260
DOSCM 0.757 1 0.3843
Tx:FARMID 12.592 12 0.3994
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Decision: No treatment by herd effect measure modification
Assess effect measure modification with previous clinical mastitis using Cox regression.
a <- coxph(Surv(CMTAR, CM) ~ factor(Tx)*factor(PrevCM) + strata(Parity) + cluster(FARMID) + strata(DOMYcat) + strata(DOSCM), data=SDCTCOW)
library(aod)
wald.test(b = coef(a), Sigma = vcov(a), Terms = 4:5)
Wald test:
----------
Chi-squared test:
X2 = 11.0, df = 2, P(> X2) = 0.004
Wald test for interaction is p < 0.05.
Model output with interaction
mm0 <- coxph(Surv(CMTAR, CM) ~ Tx*PrevCM + strata(Parity) + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW)
tidy(mm0)
According to this interaction model, the HR (referent = Blanket Tx, no prev CM) for each group are:
Blanket:Prev0 = 1
Blanket:Prev1 = 2.1
Culture:PrevCM0 = 0.93
Culture:PrevCM1 = 0.97
Algorithm:PrevCM0 = 0.93
Algorithm:PrevCM1 = 1.14
This is a counter-intuitive result: hazards of clinical mastitis are higher in blanket cows with previous history of CM.
However, these effect estimates have wide confidence intervals, and this interaction model adds unnecessary complexity and does not change our conclusions.
Decision: Report main effects.
The order for removing covariates will be in increasing likelihood of being a confounder, which is based on my knowledge about the variables and their distribution in treatment groups.
This order will be: DOMYcat, DOSCM, Parity, PrevCM
Step 6a: Full model
mm0 <- tidy(coxph(Surv(CMTAR, CM) ~ strata(Parity) + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)) %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
Step 6b: removing DOMYcat
mm0 <- tidy(coxph(Surv(CMTAR, CM) ~ strata(Parity) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW))%>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
Both HR (Culture and Algorithm) changed by <10%. DOMYcat stays out.
Step 6c: removing DOSCM
mm0 <- tidy(coxph(Surv(CMTAR, CM) ~ strata(Parity) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)) %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
Both HR changed by <10%. DOMYcat stays out.
Step 6d: removing Parity
mm0 <- tidy(coxph(Surv(CMTAR, CM) ~ PrevCM + Tx + cluster(FARMID), data=SDCTCOW)) %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
Both HR changed by <10%. Parity stays out.
Step 6e: removing PrevCM
mm0 <- tidy(coxph(Surv(CMTAR, CM) ~ Tx + cluster(FARMID), data=SDCTCOW)) %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
Both HR changed by <10%. PrevCM stays out.
No covariates included, as no evidence for confounding.
CoxCM <- tidy(coxph(Surv(CMTAR, CM) ~ Tx + cluster(FARMID), data=SDCTCOW))
CoxCM$HR <- exp(CoxCM$estimate)
CoxCM$LCL <- exp(CoxCM$conf.low)
CoxCM$UCL <- exp(CoxCM$conf.high)
CoxCM <- CoxCM %>% select(term,HR,LCL,UCL,robust.se,p.value)
CoxCM
SR <- coxph(Surv(CMTAR, CM) ~ Tx + cluster(FARMID), data=SDCTCOW)
cox.zph(SR)
rho chisq p
TxCulture 0.12471 2.077891 0.149
TxAlgorithm -0.00225 0.000429 0.983
GLOBAL NA 2.596282 0.273
SR <- cox.zph(SR)
ggcoxzph(SR,var = c("TxCulture","TxAlgorithm"))
Sufficient evidence to use Cox model.
CoxCM <- coxph(Surv(CMTAR, CM) ~ Tx + Parity + cluster(FARMID), data=SDCTCOW) %>% tidy()
CoxCM$HR <- exp(CoxCM$estimate)
CoxCM$LCL <- exp(CoxCM$conf.low)
CoxCM$UCL <- exp(CoxCM$conf.high)
CoxCM <- CoxCM %>% select(term,HR,LCL,UCL,robust.se,p.value)
CoxCM
Estimates are very similar to those found using 10% rule.
Kaplan Meier curve
KM <- survfit(Surv(Cull2TAR, Cull2) ~ Tx, data = SDCTCOW)
ggsurvplot(KM, data = SDCTCOW, title = "", pval = T, conf.int = F,risk.table.col = "Tx",risk.table = T, risk.table.y.text.col = TRUE , surv.plot.height = 5, legend.labs = c("Blanket","Culture","Algorithm"), tables.theme = theme_cleantable(), ggtheme = theme_bw())
Log-Rank test
survdiff(Surv(Cull2TAR, Cull2) ~ Tx,data=SDCTCOW,rho=0)
Call:
survdiff(formula = Surv(Cull2TAR, Cull2) ~ Tx, data = SDCTCOW,
rho = 0)
N Observed Expected (O-E)^2/E (O-E)^2/V
Tx=Blanket 407 44 42.1 0.0891 0.134
Tx=Culture 410 40 43.0 0.2128 0.323
Tx=Algorithm 394 42 40.9 0.0290 0.043
Chisq= 0.3 on 2 degrees of freedom, p= 0.8
Model type: Cox proportional hazards analysis with farm included as a cluster variable to account for lack of indepedence.
Step 1: Identify potential confouders using a directed acyclic graph (DAG)
Step 2: Identify correlated variables using pearson and kendalls correlation coefficients
Step 3: Create model with all potential confounders
Step 4: Investigate if covariates meet proportional hazards assumption
Step 5: Investigate potential effect measure modification
Step 6: Remove unneccesary covariates in backwards stepwise fashion using 10% rule (i.e. if HR changes by >10% after removal of covariate, the covariate is retained in the model)
Step 7: Report final model
library(DiagrammeR)
mermaid("graph LR
T(Treatment)-->U(Culling or Death)
A(Age)-->T
P(Parity)-->T
M(Yield at dry-off)-->T
S(SCC during prev lactation)-->T
C(CM in prev lact)-->T
A-->U
P-->U
M-->U
S-->U
C-->U
C-->M
P-->C
P-->S
P-->M
A-->P
A-->C
A-->S
A-->M
M-->S
C-->S
style A fill:#FFFFFF, stroke-width:0px
style T fill:#FFFFFF, stroke-width:2px
style P fill:#FFFFFF, stroke-width:0px
style M fill:#FFFFFF, stroke-width:0px
style S fill:#FFFFFF, stroke-width:0px
style C fill:#FFFFFF, stroke-width:0px
style I fill:#FFFFFF, stroke-width:0px
style U fill:#FFFFFF, stroke-width:2px
")
According to this DAG, I may need to control for the following variables:
Parity [“Parity”]. Will not offer Age [“Age”] as it is highly correlated.
Yield at most recent test before dry off [“DOMY”]
Somatic cell count at last herd test during previous lactation [“DOSCC”] <- will not offer PrevSCCHI as it was correlated with DOSCC
Clinical mastitis in previous lactation [“PrevCM”]
Cows that did not calve are excluded (left censored)
Reason for R censor = 120 DIM
library(broom)
library(survival)
coxph(Surv(Cull2TAR, Cull2) ~ Parity + DOMY + DOSCC + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
Call:
coxph(formula = Surv(Cull2TAR, Cull2) ~ Parity + DOMY + DOSCC +
PrevCM + Tx + cluster(FARMID), data = SDCTCOW)
coef exp(coef) se(coef) robust se z p
Parity2 0.39524 1.48475 0.25866 0.21257 1.859 0.06298
Parity3 1.13923 3.12436 0.23351 0.23115 4.929 8.29e-07
DOMY -0.01446 0.98564 0.01107 0.01471 -0.983 0.32560
DOSCC 0.08981 1.09396 0.08514 0.05241 1.714 0.08660
PrevCM1 0.26788 1.30719 0.22819 0.09322 2.874 0.00406
TxCulture -0.09850 0.90619 0.21940 0.21233 -0.464 0.64271
TxAlgorithm 0.01074 1.01079 0.21583 0.23312 0.046 0.96327
Likelihood ratio test=44.51 on 7 df, p=1.706e-07
n= 1211, number of events= 126
Both DOSCC and DOMY don’t meet the assumption of proportional hazards
SR <- cox.zph(coxph(Surv(Cull2TAR, Cull2) ~ Parity + DOMY + DOSCC + PrevCM + Tx + cluster(FARMID), data=SDCTCOW))
SR
rho chisq p
Parity2 -0.1157 2.607 0.10639
Parity3 -0.0343 0.209 0.64753
DOMY 0.1521 7.402 0.00652
DOSCC 0.1462 5.471 0.01934
PrevCM1 -0.1220 2.530 0.11172
TxCulture 0.1222 3.313 0.06872
TxAlgorithm 0.0815 1.396 0.23741
GLOBAL NA 8.182 0.31686
Schoenfield residual plots
ggcoxzph(SR,var = c("TxCulture","TxAlgorithm"))
DOSCC and DOMY don’t meet the assumption of proportional hazards
ggcoxzph(SR,var = c("DOSCC", "DOMY"))
ggcoxzph(SR,var = c("Parity2","Parity3"))
ggcoxzph(SR,var = c("PrevCM1"))
Refit cox model with seperate baselines (stratified) for DOMY and DOSCC.
No evidence of other time-varying covariates or predictors (Tx).
SR <- coxph(Surv(Cull2TAR, Cull2) ~ Parity + strata(DOMYcat) + strata(DOSCM) + PrevCM + Tx + cluster(FARMID), data=SDCTCOW)
SR <- cox.zph(SR)
SR
rho chisq p
Parity2 -0.05884 0.412749 0.521
Parity3 -0.00113 0.000217 0.988
PrevCM1 -0.06962 0.500950 0.479
TxCulture 0.10086 1.689182 0.194
TxAlgorithm 0.07741 1.091294 0.296
GLOBAL NA 2.214228 0.819
Tx : FARMID - could not assess this using Cox regression as model did not converge or reported unusually large SEs for some interaction terms.
SR <- coxph(Surv(Cull2TAR, Cull2) ~ Tx*FARMID + Parity + PrevCM + Parity + strata(DOMYcat) + strata(DOSCM), data=SDCTCOW)
Loglik converged before variable 20 ; coefficient may be infinite.
tidy(SR)
I will assess effect Tx:farm interaction using logistic regression:
Wald test for interaction term was P > 0.05.
mm0 <- glm(CM ~ Tx*FARMID + Parity + PrevCM + Parity + DOMYcat + DOSCM, data=SDCTCOW)
car::Anova(mm0)
Analysis of Deviance Table (Type II tests)
Response: CM
LR Chisq Df Pr(>Chisq)
Tx 0.924 2 0.6300
FARMID 33.573 6 8.132e-06 ***
Parity 1.953 2 0.3767
PrevCM 1.039 1 0.3082
DOMYcat 1.573 1 0.2097
DOSCM 0.810 1 0.3683
Tx:FARMID 10.174 12 0.6007
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Assess for other interactions using Wald testfrom Cox regression.
Wald test was P > 0.05 for Tx:PrevCM interaction .
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx*PrevCM + Parity + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW)
summary(mm0)
Call:
coxph(formula = Surv(Cull2TAR, Cull2) ~ Tx * PrevCM + Parity +
strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data = SDCTCOW)
n= 1211, number of events= 126
coef exp(coef) se(coef) robust se z Pr(>|z|)
TxCulture -0.02411 0.97618 0.24667 0.23595 -0.102 0.9186
TxAlgorithm 0.05495 1.05649 0.24266 0.27674 0.199 0.8426
PrevCM1 0.49435 1.63943 0.36416 0.20400 2.423 0.0154 *
Parity2 0.41575 1.51550 0.25731 0.21398 1.943 0.0520 .
Parity3 1.17829 3.24881 0.23075 0.25675 4.589 4.45e-06 ***
TxCulture:PrevCM1 -0.40970 0.66385 0.53890 0.33877 -1.209 0.2265
TxAlgorithm:PrevCM1 -0.21937 0.80303 0.53502 0.70719 -0.310 0.7564
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TxCulture 0.9762 1.0244 0.6147 1.550
TxAlgorithm 1.0565 0.9465 0.6142 1.817
PrevCM1 1.6394 0.6100 1.0991 2.445
Parity2 1.5155 0.6598 0.9964 2.305
Parity3 3.2488 0.3078 1.9642 5.374
TxCulture:PrevCM1 0.6638 1.5064 0.3417 1.290
TxAlgorithm:PrevCM1 0.8030 1.2453 0.2008 3.211
Concordance= 0.64 (se = 0.03 )
Likelihood ratio test= 33.79 on 7 df, p=2e-05
Wald test = 1281 on 7 df, p=<2e-16
Score (logrank) test = 36.55 on 7 df, p=6e-06, Robust = 7 p=0.4
(Note: the likelihood ratio and score tests assume independence of
observations within a cluster, the Wald and robust score tests do not).
wald.test(b = coef(mm0), Sigma = vcov(mm0), Terms = 6:7)
Wald test:
----------
Chi-squared test:
X2 = 1.6, df = 2, P(> X2) = 0.46
Wald test was P < 0.05 for Tx:Parity interaction .
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx*Parity + PrevCM + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW)
summary(mm0)
Call:
coxph(formula = Surv(Cull2TAR, Cull2) ~ Tx * Parity + PrevCM +
strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data = SDCTCOW)
n= 1211, number of events= 126
coef exp(coef) se(coef) robust se z Pr(>|z|)
TxCulture 0.5382 1.7130 0.5005 0.4124 1.305 0.191869
TxAlgorithm 0.5758 1.7786 0.5080 0.5537 1.040 0.298327
Parity2 0.8960 2.4498 0.4961 0.4546 1.971 0.048749 *
Parity3 1.7523 5.7680 0.4572 0.4022 4.357 1.32e-05 ***
PrevCM1 0.3018 1.3522 0.2277 0.1047 2.882 0.003946 **
TxCulture:Parity2 -0.6590 0.5173 0.6640 0.7136 -0.924 0.355691
TxAlgorithm:Parity2 -0.6252 0.5352 0.6531 0.6359 -0.983 0.325518
TxCulture:Parity3 -0.8959 0.4082 0.5861 0.2552 -3.511 0.000446 ***
TxAlgorithm:Parity3 -0.7392 0.4775 0.5900 0.4252 -1.739 0.082111 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TxCulture 1.7130 0.5838 0.7633 3.8440
TxAlgorithm 1.7786 0.5622 0.6009 5.2648
Parity2 2.4498 0.4082 1.0049 5.9721
Parity3 5.7680 0.1734 2.6224 12.6871
PrevCM1 1.3522 0.7395 1.1014 1.6602
TxCulture:Parity2 0.5173 1.9329 0.1278 2.0949
TxAlgorithm:Parity2 0.5352 1.8686 0.1539 1.8610
TxCulture:Parity3 0.4082 2.4496 0.2476 0.6731
TxAlgorithm:Parity3 0.4775 2.0943 0.2075 1.0987
Concordance= 0.651 (se = 0.025 )
Likelihood ratio test= 35.95 on 9 df, p=4e-05
Wald test = 89593 on 9 df, p=<2e-16
Score (logrank) test = 38.57 on 9 df, p=1e-05, Robust = 7 p=0.6
(Note: the likelihood ratio and score tests assume independence of
observations within a cluster, the Wald and robust score tests do not).
wald.test(b = coef(mm0), Sigma = vcov(mm0), Terms = 6:9)
Wald test:
----------
Chi-squared test:
X2 = 658.3, df = 4, P(> X2) = 0.0
Investigate potential effect measure modification by doing stratified models
Parity = 1 Frequency table
table(SDCTCOW$Tx[SDCTCOW$Parity==1],SDCTCOW$Cull2[SDCTCOW$Parity==1])
0 1
Blanket 152 6
Culture 173 12
Algorithm 156 11
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + PrevCM + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW[SDCTCOW$Parity==1,]) %>% tidy()
mm0$HR <- exp(mm0$estimate)
mm0$LCL <- exp(mm0$conf.low)
mm0$UCL <- exp(mm0$conf.high)
mm0 %>% select("term","HR","LCL","UCL")
Frequency table
table(SDCTCOW$Tx[SDCTCOW$Parity==2],SDCTCOW$Cull2[SDCTCOW$Parity==2])
0 1
Blanket 120 13
Culture 99 9
Algorithm 112 11
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + PrevCM + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW[SDCTCOW$Parity==2,]) %>% tidy()
mm0$HR <- exp(mm0$estimate)
mm0$LCL <- exp(mm0$conf.low)
mm0$UCL <- exp(mm0$conf.high)
mm0 %>% select("term","HR","LCL","UCL")
Frequency table
table(SDCTCOW$Tx[SDCTCOW$Parity==3],SDCTCOW$Cull2[SDCTCOW$Parity==3])
0 1
Blanket 91 25
Culture 98 19
Algorithm 84 20
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + PrevCM + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW[SDCTCOW$Parity==3,]) %>% tidy
mm0$HR <- exp(mm0$estimate)
mm0$LCL <- exp(mm0$conf.low)
mm0$UCL <- exp(mm0$conf.high)
mm0 %>% select("term","HR","LCL","UCL")
This finding suggests that potential culling risks associated with SDCT may be higher in parity 1 cows than in multiparous animals. However these HR estimates in all 3 parity groups are imprecise (wide confidence intervals) because of a small number of culling/death events. For example, in lactation 1 animals: 6 in BDCT, 11 in Algorithm and 12 in Culture. Therefore, this difference in effects across levels of parity may be due to chance alone, and not worth investigating too closely.
Decision: Report main effects.
The order for removing covariates will be in increasing likelihood of being a confounder, which is based on my knowledge about the variables and their distribution in treatment groups.
This order will be: DOMYcat, DOSCM, PrevCM, Parity
Step 6a: Full model
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + PrevCM + Parity + strata(DOMYcat) + strata(DOSCM) + cluster(FARMID), data=SDCTCOW) %>% tidy() %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
Step 6b: removing DOMYcat
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + PrevCM + Parity + strata(DOSCM) + cluster(FARMID), data=SDCTCOW) %>% tidy() %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
HR for culture and algorithm didn’t change by >10%. DOMYcat stays out.
Step 6c: removing DOSCM
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + PrevCM + Parity + cluster(FARMID), data=SDCTCOW) %>% tidy() %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
HR changed by <10%. DOSCM stays out.
Step 6d: removing PrevCM
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + Parity + cluster(FARMID), data=SDCTCOW) %>% tidy() %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
HR changed by <10%. Leave PrevCM out.
Step 6e: removing Parity
mm0 <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + cluster(FARMID), data=SDCTCOW) %>% tidy() %>% select("term","estimate")
mm0$HR <- exp(mm0$estimate)
mm0
HR changed by <10%. Parity stays out.
No covariates included, as no evidence for confounding.
CoxCull <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + cluster(FARMID), data=SDCTCOW) %>% tidy()
CoxCull$HR <- exp(CoxCull$estimate)
CoxCull$LCL <- exp(CoxCull$conf.low)
CoxCull$UCL <- exp(CoxCull$conf.high)
CoxCull <- CoxCull %>% select(term,HR,LCL,UCL,robust.se,p.value)
CoxCull
SR <- coxph(Surv(Cull2TAR, Cull2) ~ Tx + cluster(FARMID), data=SDCTCOW)
cox.zph(SR)
rho chisq p
TxCulture 0.1100 1.52 0.217
TxAlgorithm 0.0951 1.12 0.290
GLOBAL NA 1.53 0.466
SR <- cox.zph(SR)
ggcoxzph(SR,var = c("TxCulture","TxAlgorithm"))
CoxCull <- coxph(Surv(CullTAR, Cull2) ~ Tx + Parity + DOSCC + PrevCM + cluster(FARMID), data=SDCTCOW) %>% tidy()
CoxCull$HR <- exp(CoxCull$estimate)
CoxCull$LCL <- exp(CoxCull$conf.low)
CoxCull$UCL <- exp(CoxCull$conf.high)
CoxCull <- CoxCull %>% select(term,HR,LCL,UCL,robust.se,p.value)
CoxCull
Estimates are similar to 10% rule based approach
Dataset example
SDCTCOWDHI$Tx <-
factor(SDCTCOWDHI$Tx,
levels=c(0,1,2),
labels=c("Blanket",
"Culture",
"Algorithm"))
DHIcheck <- SDCTCOWDHI %>% select(Tx,CowID,FARMID,DIM,TestDIMcat20,LSCC,MY,Parity,PrevCM,DOMY,DOSCC)
head(DHIcheck, n=10)
DHIoutcome <- SDCTCOWDHI %>% select(SCC,MY,SCM)
DHIoutcome$logSCC <- log(DHIoutcome$SCC + 1)
#summarytools::dfSummary(DHIoutcome, style='grid')
print(summarytools::dfSummary(DHIoutcome, valid.col=FALSE, graph.magnif=0.8, style="grid"), method = "render")
| No | Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | SCC [numeric] | Mean (sd) : 184.4 (678.2) min < med < max: 0 < 39 < 9999 IQR (CV) : 89 (3.7) | 608 distinct values | 3 (0.08%) | |||||||||||||
| 2 | MY [numeric] | Mean (sd) : 49.1 (10.5) min < med < max: 0.9 < 49.4 < 93.4 IQR (CV) : 13.2 (0.2) | 149 distinct values | 2 (0.05%) | |||||||||||||
| 3 | SCM [numeric] | Min : 0 Mean : 0.1 Max : 1 |
|
3 (0.08%) | |||||||||||||
| 4 | logSCC [numeric] | Mean (sd) : 3.9 (1.4) min < med < max: 0 < 3.7 < 9.2 IQR (CV) : 1.7 (0.3) | 608 distinct values | 3 (0.08%) |
Generated by summarytools 0.9.3 (R version 3.6.0)
2019-07-02
NA
Very little missing data. Will do complete case analysis.
Somatic cell count (log x 10,000 cells)
qqPlot(log(SDCTCOWDHI$SCC+1))
hist(log(SDCTCOWDHI$SCC+1))
Milk yield (kg)
qqPlot(SDCTCOWDHI$MY)
hist(SDCTCOWDHI$MY)
Model type: Linear mixed models, random intercepts for farm and cow will be fitted to account for repeated measures within cows, and clustering of cows within herds
Step 1: Identify potential confouders using a directed acyclic graph (DAG)
Step 2: Identify correlated variables using pearson and kendalls correlation coefficients
Step 3: Create model with all potential confounders
Step 4: Investigate potential effect measure modification
Step 5: Remove unneccesary covariates in backwards stepwise fashion using 10% rule (i.e. if estimated difference in SCC changes by >10% after removing the covariate, the covariate is retained in the model)
Step 6: Report final model
Step 7: Model diagnostics
library(DiagrammeR)
mermaid("graph LR
T(Treatment)-->U(SCC)
A(Age)-->T
P(Parity)-->T
M(Yield at dry-off)-->T
S(SCC during prev lactation)-->T
C(CM in prev lact)-->T
D(Days in milk at test)-->U
D-->T
A-->U
P-->U
M-->U
S-->U
C-->U
C-->M
P-->C
P-->S
P-->M
A-->P
A-->C
A-->S
A-->M
M-->S
C-->S
style A fill:#FFFFFF, stroke-width:0px
style T fill:#FFFFFF, stroke-width:2px
style P fill:#FFFFFF, stroke-width:0px
style M fill:#FFFFFF, stroke-width:0px
style S fill:#FFFFFF, stroke-width:0px
style C fill:#FFFFFF, stroke-width:0px
style I fill:#FFFFFF, stroke-width:0px
style D fill:#FFFFFF, stroke-width:0px
style U fill:#FFFFFF, stroke-width:2px
")
According to this DAG, I may need to control for the following variables:
Parity [“Parity”] <- Age not offered as highly correlated
Yield at most recent test before dry off [“DOMY”]
Somatic cell count at last herd test during previous lactation [“DOSCC”] <- PrevSCCHI not offered as correlated
Clinical mastitis in previous lactation [“PrevCM”]
Days in milk at herd test (category, 0-20 “10”, 21-40 “30” etc) [“TestDIMcat20”]
mm0 <- lmer(LSCC ~ Tx + TestDIMcat20 + Parity + DOSCC + DOMY + PrevCM + (1|FARMID/CowID), data=SDCTCOWDHI)
summary(mm0)
Linear mixed model fit by REML ['lmerMod']
Formula: LSCC ~ Tx + TestDIMcat20 + Parity + DOSCC + DOMY + PrevCM + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 13110.3
Scaled residuals:
Min 1Q Median 3Q Max
-5.5975 -0.4985 -0.1040 0.4340 3.9735
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 0.61791 0.7861
FARMID (Intercept) 0.02027 0.1424
Residual 1.14078 1.0681
Number of obs: 3989, groups: CowID:FARMID, 1178; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.338254 0.204372 16.334
TxCulture 0.047878 0.070138 0.683
TxAlgorithm 0.074745 0.070822 1.055
TestDIMcat2030 -0.576961 0.063393 -9.101
TestDIMcat2050 -0.557223 0.060950 -9.142
TestDIMcat2070 -0.394721 0.059969 -6.582
TestDIMcat2090 -0.272158 0.065089 -4.181
TestDIMcat20110 -0.084757 0.060209 -1.408
Parity2 0.110743 0.070611 1.568
Parity3 0.259957 0.074986 3.467
DOSCC 0.139922 0.028350 4.935
DOMY 0.002402 0.003983 0.603
PrevCM1 0.361076 0.087707 4.117
Correlation matrix not shown by default, as p = 13 > 12.
Use print(x, correlation=TRUE) or
vcov(x) if you need it
car::vif(mm0)
GVIF Df GVIF^(1/(2*Df))
Tx 1.009521 2 1.002372
TestDIMcat20 1.003238 5 1.000323
Parity 1.141466 2 1.033631
DOSCC 1.319455 1 1.148676
DOMY 1.167395 1 1.080461
PrevCM 1.050427 1 1.024903
Tx:FARM (P > 0.05)
mm0 <- lmer(LSCC ~ Tx*FARMID + Parity + TestDIMcat20 + DOSCC + DOMY + PrevCM + (1|CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: LSCC
Chisq Df Pr(>Chisq)
Tx 0.3270 2 0.849151
FARMID 20.8837 6 0.001925 **
Parity 10.7805 2 0.004561 **
TestDIMcat20 153.2866 5 < 2.2e-16 ***
DOSCC 23.5900 1 1.192e-06 ***
DOMY 0.1511 1 0.697525
PrevCM 16.3381 1 5.299e-05 ***
Tx:FARMID 13.6785 12 0.321709
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tx:DIM category at herd test (P > 0.05)
mm0 <- lmer(LSCC ~ Tx*TestDIMcat20 + Parity + DOSCC + DOMY + PrevCM + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: LSCC
Chisq Df Pr(>Chisq)
Tx 1.1452 2 0.56405
TestDIMcat20 155.5912 5 < 2.2e-16 ***
Parity 12.0313 2 0.00244 **
DOSCC 24.5071 1 7.404e-07 ***
DOMY 0.3514 1 0.55332
PrevCM 16.9005 1 3.939e-05 ***
Tx:TestDIMcat20 3.6320 10 0.96242
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tx:Parity (P > 0.05)
mm0 <- lmer(LSCC ~ Tx*Parity + TestDIMcat20 + DOSCC + DOMY + PrevCM + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: LSCC
Chisq Df Pr(>Chisq)
Tx 1.1474 2 0.563433
Parity 12.0289 2 0.002443 **
TestDIMcat20 155.4765 5 < 2.2e-16 ***
DOSCC 25.0949 1 5.458e-07 ***
DOMY 0.4871 1 0.485238
PrevCM 16.3909 1 5.153e-05 ***
Tx:Parity 5.3325 4 0.254853
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tx:PrevCM (P > 0.05)
mm0 <- lmer(LSCC ~ Tx*PrevCM + Parity + TestDIMcat20 + DOSCC + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: LSCC
Chisq Df Pr(>Chisq)
Tx 1.1470 2 0.563549
PrevCM 16.9457 1 3.846e-05 ***
Parity 12.3024 2 0.002131 **
TestDIMcat20 155.7581 5 < 2.2e-16 ***
DOSCC 23.9345 1 9.967e-07 ***
DOMY 0.2841 1 0.594020
Tx:PrevCM 1.1795 2 0.554478
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tx:DOMY (P > 0.05)
mm0 <- lmer(LSCC ~ Tx*DOMY + PrevCM + Parity + TestDIMcat20 + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: LSCC
Chisq Df Pr(>Chisq)
Tx 1.1481 2 0.56324
DOMY 0.3580 1 0.54964
PrevCM 17.4500 1 2.95e-05 ***
Parity 11.7208 2 0.00285 **
TestDIMcat20 155.5513 5 < 2.2e-16 ***
DOSCC 24.5607 1 7.20e-07 ***
Tx:DOMY 3.2738 2 0.19458
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
The order for removing covariates will be in increasing likelihood of being a confounder, which is based on my knowledge about the variables and their distribution in treatment groups.
This order will be: DOMY, Parity, PrevCM, DOSCC
TestDIMcat20 will not be removed.
Step 5a: Full model
mm0 <- lmer(LSCC ~ Tx + PrevCM + Parity + TestDIMcat20 + DOSCC + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy()
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Step 5b: removing DOMY
mm0 <- lmer(LSCC ~ Tx + PrevCM + Parity + TestDIMcat20 + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0%>% tidy()
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by <10%. DOMY stays out.
Step 5b: removing Parity
mm0 <- lmer(LSCC ~ Tx + PrevCM + TestDIMcat20 + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy()
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by <10%. Parity stays out.
Step 5c: removing PrevCM
mm0 <- lmer(LSCC ~ Tx + TestDIMcat20 + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy()
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by >10%. PrevCM stays in.
Step 5d: removing DOSCC
mm0 <- lmer(LSCC ~ Tx + PrevCM + TestDIMcat20 + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy()
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by > 10%. DOSCC stays in.
mm0 <- lmer(LSCC ~ Tx + PrevCM + DOSCC + TestDIMcat20 + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy(conf.int=TRUE)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
mm0 <- lmer(LSCC ~ 1 + (1|FARMID/CowID), data=SDCTCOWDHI)
summary(mm0)
Linear mixed model fit by REML ['lmerMod']
Formula: LSCC ~ 1 + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 13293.7
Scaled residuals:
Min 1Q Median 3Q Max
-5.0159 -0.5107 -0.1062 0.4452 4.0999
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 0.65871 0.8116
FARMID (Intercept) 0.03917 0.1979
Residual 1.20343 1.0970
Number of obs: 3989, groups: CowID:FARMID, 1178; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.86908 0.08308 46.57
ICC (CowID) = 0.35 ICC (FARMID) = 0.02
Most clustering is happening within cow, which is not a suprise given this is longitudinal data.
lmer(LSCC ~ Tx + Parity + PrevCM + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: LSCC ~ Tx + Parity + PrevCM + DOSCC + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 13233.2
Scaled residuals:
Min 1Q Median 3Q Max
-5.1671 -0.5100 -0.1122 0.4512 4.1458
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 0.59501 0.7714
FARMID (Intercept) 0.01825 0.1351
Residual 1.20377 1.0972
Number of obs: 3989, groups: CowID:FARMID, 1178; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.12018 0.13160 23.710
TxCulture 0.04063 0.06986 0.582
TxAlgorithm 0.06227 0.07063 0.882
Parity2 0.11211 0.07042 1.592
Parity3 0.25300 0.07477 3.384
PrevCM1 0.37041 0.08730 4.243
DOSCC 0.13267 0.02635 5.035
Correlation of Fixed Effects:
(Intr) TxCltr TxAlgr Party2 Party3 PrvCM1
TxCulture -0.298
TxAlgorithm -0.270 0.498
Parity2 -0.042 0.054 0.021
Parity3 0.038 0.012 0.019 0.439
PrevCM1 0.069 -0.019 0.013 -0.044 -0.100
DOSCC -0.786 0.026 0.000 -0.223 -0.295 -0.143
lmer(LSCC ~ Tx + Parity + PrevCM + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI) %>% car::Anova() %>% tidy()
NA
Estimates are very similar to 10% rule based approach.
Geometric mean SCC
mm0 <- lmer(LSCC ~ Tx + PrevCM + DOSCC + TestDIMcat20 + (1|FARMID/CowID), data=SDCTCOWDHI)
emm <- emmeans(mm0, ~Tx) %>% tidy()
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
emm
Geometric mean SCC
mm0 <- lmer(LSCC ~ Tx + PrevCM + DOSCC + TestDIMcat20 + (1|FARMID/CowID), data=SDCTCOWDHI)
emm <- emmeans(mm0, ~Tx) %>% tidy()
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
emm$SCC <- exp(emm$estimate)
emm$LCL <- exp(emm$asymp.LCL)
emm$UCL <- exp(emm$asymp.UCL)
emm <- emm %>% select(Tx,SCC,LCL,UCL)
emm
Reported as back-transformed estimated marginal means by herd test day, no interaction with test DIM
mm0 <- lmer(LSCC ~ Tx + TestDIMcat20 + DOSCC + PrevCM + (1|FARMID/CowID), data=SDCTCOWDHI)
atx <- c(10,30,50,70,90,110)
emm <- emmeans(mm0, ~Tx*TestDIMcat20, at=list(atx)) %>% tidy()
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
emm$SCC <- exp(emm$estimate)
emm$LCL <- exp(emm$asymp.LCL)
emm$UCL <- exp(emm$asymp.UCL)
emm <- emm %>% select(Tx,TestDIMcat20,SCC,LCL,UCL)
curve <- ggplot(emm) + coord_cartesian(ylim = (c(0,100))) + aes(x=TestDIMcat20, y=SCC, group=Tx, colour=Tx) + geom_point() + geom_line(aes(colour=Tx,linetype=Tx)) + geom_ribbon(aes(ymin=emm$LCL, ymax=emm$UCL,colour=Tx,fill=Tx), linetype=0, alpha=0.1)
curve
Reported as back-transformed estimated marginal means by herd test day, with interaction with test DIM
mm0 <- lmer(LSCC ~ Tx*TestDIMcat20 + PrevCM + DOSCC + (1|FARMID/CowID), data=SDCTCOWDHI)
atx <- c(10,30,50,70,90,110)
emm <- emmeans(mm0, ~Tx*TestDIMcat20, at=list(atx)) %>% tidy()
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3989) or larger,
but be warned that this may result in large computation time and memory use.
emm$SCC <- exp(emm$estimate)
emm$LCL <- exp(emm$asymp.LCL)
emm$UCL <- exp(emm$asymp.UCL)
emm <- emm %>% select(Tx,TestDIMcat20,SCC,LCL,UCL)
curve <- ggplot(emm) + coord_cartesian(ylim = (c(0,100))) + aes(x=TestDIMcat20, y=SCC, group=Tx, colour=Tx) + geom_point() + geom_line(aes(colour=Tx,linetype=Tx)) + geom_ribbon(aes(ymin=emm$LCL, ymax=emm$UCL,colour=Tx,fill=Tx), linetype=0, alpha=0.1)
curve
### Step 7: Model diagnostics Checking homoskedasticity assumption (variance of residuals)
mm0 <- lmer(LSCC ~ Tx + TestDIMcat20 + (1|FARMID/CowID), data=SDCTCOWDHI)
ggplot(data.frame(eta=predict(mm0,type="link"),pearson=residuals(mm0,type="pearson")),
aes(x=eta,y=pearson)) +
geom_point() +
theme_bw()
Checking normality of residuals
qqnorm(residuals(mm0))
Evidence of small L tail. Although some evidence of heteroskedascitity and not perfectly normal residuals, I believe these are allowable for this model.
Model type: Linear mixed models, random intercepts for farm and cow will be fitted to account for repeated measures within cows, and clustering of cows within herds
Step 1: Identify potential confouders using a directed acyclic graph (DAG)
Step 2: Identify correlated variables using pearson and kendalls correlation coefficients
Step 3: Create model with all potential confounders
Step 4: Investigate potential effect measure modification
Step 5: Remove unneccesary covariates in backwards stepwise fashion using 10% rule (i.e. if estimated difference in milk yield changes by >10% after removing the covariate, the covariate is retained in the model)
Step 6: Report final model
Step 7: Model diagnostics
library(DiagrammeR)
mermaid("graph LR
T(Treatment)-->U(Milk yield)
A(Age)-->T
P(Parity)-->T
M(Yield at dry-off)-->T
S(SCC during prev lactation)-->T
C(CM in prev lact)-->T
D(Days in milk at test)-->U
D-->T
A-->U
P-->U
M-->U
S-->U
C-->U
C-->M
P-->C
P-->S
P-->M
A-->P
A-->C
A-->S
A-->M
M-->S
C-->S
style A fill:#FFFFFF, stroke-width:0px
style T fill:#FFFFFF, stroke-width:2px
style P fill:#FFFFFF, stroke-width:0px
style M fill:#FFFFFF, stroke-width:0px
style S fill:#FFFFFF, stroke-width:0px
style C fill:#FFFFFF, stroke-width:0px
style I fill:#FFFFFF, stroke-width:0px
style D fill:#FFFFFF, stroke-width:0px
style U fill:#FFFFFF, stroke-width:2px
")
According to this DAG, I may need to control for the following variables:
Parity [“Parity”] <- Age not offered as highly correlated
Yield at most recent test before dry off [“DOMY”]
Somatic cell count at last herd test during previous lactation [“DOSCC”] <- PrevSCCHI not offered as correlated
Clinical mastitis in previous lactation [“PrevCM”]
Days in milk at herd test (category, 0-20 “10”, 21-40 “30” etc) [“TestDIMcat20”]
mm0 <- lmer(MY ~ Tx + TestDIMcat20 + Parity + DOSCC + PrevCM + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
summary(mm0)
Linear mixed model fit by REML ['lmerMod']
Formula: MY ~ Tx + TestDIMcat20 + Parity + DOSCC + PrevCM + DOMY + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 27948.5
Scaled residuals:
Min 1Q Median 3Q Max
-6.3698 -0.4620 0.0223 0.5283 3.9851
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 28.70 5.357
FARMID (Intercept) 9.63 3.103
Residual 46.18 6.795
Number of obs: 3990, groups: CowID:FARMID, 1177; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 30.22473 1.76719 17.103
TxCulture -0.04144 0.46718 -0.089
TxAlgorithm -0.92795 0.47146 -1.968
TestDIMcat2030 11.86182 0.40548 29.253
TestDIMcat2050 12.87916 0.38854 33.148
TestDIMcat2070 12.51208 0.38249 32.713
TestDIMcat2090 11.01316 0.41572 26.492
TestDIMcat20110 8.43431 0.38401 21.964
Parity2 2.52549 0.47112 5.361
Parity3 3.01851 0.50067 6.029
DOSCC 0.26481 0.19029 1.392
PrevCM1 -0.24232 0.58620 -0.413
DOMY 0.22068 0.02706 8.156
Correlation matrix not shown by default, as p = 13 > 12.
Use print(x, correlation=TRUE) or
vcov(x) if you need it
car::vif(mm0)
GVIF Df GVIF^(1/(2*Df))
Tx 1.009493 2 1.002365
TestDIMcat20 1.002987 5 1.000298
Parity 1.145893 2 1.034632
DOSCC 1.308164 1 1.143750
PrevCM 1.048252 1 1.023842
DOMY 1.154875 1 1.074651
mm0 <- lmer(MY ~ Tx*FARMID + Parity + TestDIMcat20 + DOSCC + DOMY + PrevCM + (1|CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: MY
Chisq Df Pr(>Chisq)
Tx 7.1919 2 0.02743 *
FARMID 383.1701 6 < 2.2e-16 ***
Parity 56.0941 2 6.597e-13 ***
TestDIMcat20 1502.4062 5 < 2.2e-16 ***
DOSCC 1.8384 1 0.17514
DOMY 73.6534 1 < 2.2e-16 ***
PrevCM 0.2286 1 0.63260
Tx:FARMID 21.1764 12 0.04786 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Significant interaction term.
Descision: Revisit this after covariates are finalized.
Tx:DIM category at herd test (P > 0.05)
mm0 <- lmer(MY ~ Tx*TestDIMcat20 + Parity + DOSCC + DOMY + PrevCM + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: MY
Chisq Df Pr(>Chisq)
Tx 4.9281 2 0.08509 .
TestDIMcat20 1563.6373 5 < 2.2e-16 ***
Parity 45.3091 2 1.450e-10 ***
DOSCC 1.8538 1 0.17335
DOMY 65.8173 1 4.947e-16 ***
PrevCM 0.1793 1 0.67194
Tx:TestDIMcat20 11.3951 10 0.32757
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tx:Parity (P > 0.05)
mm0 <- lmer(MY ~ Tx*Parity + TestDIMcat20 + DOSCC + DOMY + PrevCM + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: MY
Chisq Df Pr(>Chisq)
Tx 4.9461 2 0.08433 .
Parity 45.4964 2 1.32e-10 ***
TestDIMcat20 1563.4543 5 < 2.2e-16 ***
DOSCC 2.2956 1 0.12974
DOMY 68.0948 1 < 2.2e-16 ***
PrevCM 0.2483 1 0.61826
Tx:Parity 6.9870 4 0.13658
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Tx:PrevCM (P > 0.05)
mm0 <- lmer(MY ~ Tx*PrevCM + Parity + TestDIMcat20 + DOSCC + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: MY
Chisq Df Pr(>Chisq)
Tx 4.9345 2 0.08482 .
PrevCM 0.1709 1 0.67934
Parity 45.4975 2 1.319e-10 ***
TestDIMcat20 1561.6764 5 < 2.2e-16 ***
DOSCC 2.0081 1 0.15647
DOMY 67.3721 1 2.248e-16 ***
Tx:PrevCM 1.6370 2 0.44109
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
The order for removing covariates will be in increasing likelihood of being a confounder, which is based on my knowledge about the variables and their distribution in treatment groups.
This order will be: DOSCC, PrevCM, Parity, DOMY
TestDIMcat20 will not be removed (forced into model).
Step 5a: Full model
mm0 <- lmer(MY ~ Tx + PrevCM + Parity + TestDIMcat20 + DOSCC + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy() %>% select(term,estimate)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Step 5b: Removing DOSCC
mm0 <- lmer(MY ~ Tx + PrevCM + Parity + TestDIMcat20 + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy() %>% select(term,estimate)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by <10%. DOSCC stays out
Step 5c: removing PrevCM
mm0 <- lmer(MY ~ Tx + Parity + TestDIMcat20 + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy() %>% select(term,estimate)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by < 10%. PrevCM stays out.
Step 5d: removing Parity
mm0 <- lmer(MY ~ Tx + TestDIMcat20 + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy() %>% select(term,estimate)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by >10%. Parity stays in.
Step 5e: removing DOMY
mm0 <- lmer(MY ~ Tx + Parity + TestDIMcat20 + (1|FARMID/CowID), data=SDCTCOWDHI)
mm0 %>% tidy() %>% select(term,estimate)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
Changed by >10%. DOMY stays in.
Effect measure modification with herd (P < 0.05)
mm0 <- lmer(MY ~ Tx*FARMID + TestDIMcat20 + Parity + DOMY + (1|CowID), data=SDCTCOWDHI)
car::Anova(mm0)
Analysis of Deviance Table (Type II Wald chisquare tests)
Response: MY
Chisq Df Pr(>Chisq)
Tx 7.0355 2 0.02967 *
FARMID 409.5611 6 < 2.2e-16 ***
TestDIMcat20 1502.2612 5 < 2.2e-16 ***
Parity 70.6857 2 4.475e-16 ***
DOMY 74.5582 1 < 2.2e-16 ***
Tx:FARMID 21.1553 12 0.04815 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Decision - will investigate effect estimates stratified by herd
Model output
mm0 <- lmer(MY ~ Tx*FARMID + TestDIMcat20 + Parity + DOMY + (1|CowID), data=SDCTCOWDHI)
emmeans(mm0,pairwise ~ Tx | FARMID)
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
$emmeans
FARMID = 1:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 47.4 1.240 Inf 45.0 49.8
Culture 49.7 1.162 Inf 47.4 51.9
Algorithm 46.2 1.381 Inf 43.5 48.9
FARMID = 2:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 49.6 1.200 Inf 47.3 52.0
Culture 46.0 1.300 Inf 43.4 48.5
Algorithm 43.4 1.188 Inf 41.1 45.8
FARMID = 3:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 43.4 0.703 Inf 42.1 44.8
Culture 43.8 0.701 Inf 42.4 45.1
Algorithm 42.0 0.736 Inf 40.6 43.5
FARMID = 4:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 53.3 0.565 Inf 52.2 54.4
Culture 52.5 0.534 Inf 51.4 53.5
Algorithm 53.2 0.558 Inf 52.2 54.3
FARMID = 5:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 49.4 0.739 Inf 48.0 50.9
Culture 49.9 0.712 Inf 48.5 51.3
Algorithm 48.1 0.752 Inf 46.6 49.5
FARMID = 6:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 49.2 1.515 Inf 46.2 52.1
Culture 52.3 1.548 Inf 49.2 55.3
Algorithm 50.7 1.698 Inf 47.4 54.1
FARMID = 7:
Tx emmean SE df asymp.LCL asymp.UCL
Blanket 50.4 1.014 Inf 48.4 52.4
Culture 49.4 1.066 Inf 47.3 51.5
Algorithm 49.2 1.067 Inf 47.1 51.2
Results are averaged over the levels of: TestDIMcat20, Parity
Degrees-of-freedom method: asymptotic
Confidence level used: 0.95
$contrasts
FARMID = 1:
contrast estimate SE df z.ratio p.value
Blanket - Culture -2.2771 1.693 Inf -1.345 0.3704
Blanket - Algorithm 1.1734 1.848 Inf 0.635 0.8008
Culture - Algorithm 3.4506 1.800 Inf 1.917 0.1337
FARMID = 2:
contrast estimate SE df z.ratio p.value
Blanket - Culture 3.6627 1.767 Inf 2.072 0.0956
Blanket - Algorithm 6.2020 1.686 Inf 3.678 0.0007
Culture - Algorithm 2.5393 1.758 Inf 1.444 0.3182
FARMID = 3:
contrast estimate SE df z.ratio p.value
Blanket - Culture -0.3310 0.990 Inf -0.334 0.9402
Blanket - Algorithm 1.4086 1.015 Inf 1.388 0.3474
Culture - Algorithm 1.7396 1.015 Inf 1.713 0.2002
FARMID = 4:
contrast estimate SE df z.ratio p.value
Blanket - Culture 0.8679 0.770 Inf 1.128 0.4969
Blanket - Algorithm 0.0873 0.786 Inf 0.111 0.9932
Culture - Algorithm -0.7806 0.760 Inf -1.027 0.5600
FARMID = 5:
contrast estimate SE df z.ratio p.value
Blanket - Culture -0.5102 0.985 Inf -0.518 0.8627
Blanket - Algorithm 1.3592 1.015 Inf 1.339 0.3733
Culture - Algorithm 1.8695 1.003 Inf 1.863 0.1495
FARMID = 6:
contrast estimate SE df z.ratio p.value
Blanket - Culture -3.1044 2.164 Inf -1.435 0.3229
Blanket - Algorithm -1.5474 2.273 Inf -0.681 0.7747
Culture - Algorithm 1.5571 2.290 Inf 0.680 0.7753
FARMID = 7:
contrast estimate SE df z.ratio p.value
Blanket - Culture 0.9515 1.467 Inf 0.649 0.7931
Blanket - Algorithm 1.2379 1.468 Inf 0.843 0.6761
Culture - Algorithm 0.2864 1.503 Inf 0.191 0.9802
Results are averaged over the levels of: TestDIMcat20, Parity
P value adjustment: tukey method for comparing a family of 3 estimates
It appears that herd 2 (87 cows) and herd 6 (42 cows) have extreme results compared with the other herds. In herd 2, BDCT had much higher MY than SDCT cows. In herd 6, it was the opposite effect. Given these were the smallest herds in the study, I will report a pooled result.
Model output
summary(lmer(MY ~ Tx + TestDIMcat20 + Parity + DOMY+ (1|FARMID/CowID), data=SDCTCOWDHI))
Linear mixed model fit by REML ['lmerMod']
Formula: MY ~ Tx + TestDIMcat20 + Parity + DOMY + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 27949.8
Scaled residuals:
Min 1Q Median 3Q Max
-6.3374 -0.4572 0.0207 0.5284 3.9942
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 28.677 5.355
FARMID (Intercept) 9.946 3.154
Residual 46.184 6.796
Number of obs: 3990, groups: CowID:FARMID, 1177; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 31.61485 1.47112 21.490
TxCulture -0.04671 0.46702 -0.100
TxAlgorithm -0.92025 0.47129 -1.953
TestDIMcat2030 11.86555 0.40548 29.263
TestDIMcat2050 12.87773 0.38856 33.143
TestDIMcat2070 12.51168 0.38250 32.710
TestDIMcat2090 11.01773 0.41571 26.503
TestDIMcat20110 8.43423 0.38402 21.963
Parity2 2.66366 0.45780 5.818
Parity3 3.20325 0.47445 6.752
DOMY 0.20758 0.02539 8.175
Correlation of Fixed Effects:
(Intr) TxCltr TxAlgr TDIM203 TDIM205 TDIM207 TDIM209 TDIM201 Party2 Party3
TxCulture -0.132
TxAlgorithm -0.146 0.499
TstDIMc2030 -0.144 -0.013 -0.012
TstDIMc2050 -0.132 -0.010 -0.013 0.486
TstDIMc2070 -0.135 -0.010 -0.004 0.534 0.481
TstDIMc2090 -0.143 -0.005 -0.005 0.515 0.499 0.471
TstDIM20110 -0.129 -0.008 -0.004 0.514 0.507 0.527 0.465
Parity2 -0.167 0.055 0.021 0.000 0.008 -0.001 0.004 0.006
Parity3 -0.173 0.013 0.018 -0.014 0.009 -0.009 0.012 -0.001 0.399
DOMY -0.478 -0.065 -0.027 0.013 -0.002 -0.002 0.019 -0.014 0.066 0.101
Model output (tidy)
lmer(MY ~ Tx + TestDIMcat20 + Parity + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI) %>% tidy(conf.int=TRUE)
binding factor and character vector, coercing into character vectorbinding character and factor vector, coercing into character vector
mm0 <- lmer(MY ~ 1 + (1|FARMID/CowID), data=SDCTCOWDHI)
summary(mm0)
Linear mixed model fit by REML ['lmerMod']
Formula: MY ~ 1 + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 29310.2
Scaled residuals:
Min 1Q Median 3Q Max
-5.1211 -0.4867 0.0849 0.5681 3.5617
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 27.03 5.199
FARMID (Intercept) 10.06 3.172
Residual 70.88 8.419
Number of obs: 3990, groups: CowID:FARMID, 1177; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 48.286 1.226 39.38
ICC (CowID) = 0.25 ICC (FARMID) = 0.09
Emmeans without TestDIM interaction
mm0 <- lmer(MY ~ Tx + TestDIMcat20 + Parity + DOMY+ (1|FARMID/CowID), data=SDCTCOWDHI)
emmeans(mm0,~Tx) %>% tidy
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
Model output
summary(lmer(MY ~ Tx + TestDIMcat20 + Parity + (1|FARMID/CowID), data=SDCTCOWDHI))
Linear mixed model fit by REML ['lmerMod']
Formula: MY ~ Tx + TestDIMcat20 + Parity + (1 | FARMID/CowID)
Data: SDCTCOWDHI
REML criterion at convergence: 28009.3
Scaled residuals:
Min 1Q Median 3Q Max
-6.2353 -0.4645 0.0246 0.5246 3.9839
Random effects:
Groups Name Variance Std.Dev.
CowID:FARMID (Intercept) 31.11 5.578
FARMID (Intercept) 11.14 3.338
Residual 46.17 6.795
Number of obs: 3990, groups: CowID:FARMID, 1177; FARMID, 7
Fixed effects:
Estimate Std. Error t value
(Intercept) 37.3558 1.3606 27.456
TxCulture 0.2008 0.4792 0.419
TxAlgorithm -0.8169 0.4844 -1.687
TestDIMcat2030 11.8422 0.4064 29.139
TestDIMcat2050 12.8803 0.3890 33.113
TestDIMcat2070 12.5248 0.3830 32.704
TestDIMcat2090 10.9596 0.4165 26.315
TestDIMcat20110 8.4713 0.3844 22.040
Parity2 2.4166 0.4697 5.145
Parity3 2.8034 0.4853 5.777
Correlation of Fixed Effects:
(Intr) TxCltr TxAlgr TDIM203 TDIM205 TDIM207 TDIM209 TDIM201 Party2
TxCulture -0.182
TxAlgorithm -0.177 0.499
TstDIMc2030 -0.150 -0.012 -0.012
TstDIMc2050 -0.144 -0.010 -0.013 0.486
TstDIMc2070 -0.147 -0.010 -0.004 0.535 0.480
TstDIMc2090 -0.145 -0.004 -0.005 0.515 0.499 0.470
TstDIM20110 -0.147 -0.009 -0.005 0.515 0.507 0.528 0.464
Parity2 -0.151 0.059 0.022 -0.001 0.008 -0.001 0.003 0.006
Parity3 -0.139 0.019 0.021 -0.015 0.009 -0.008 0.010 0.000 0.395
mm0 <- lmer(MY ~ Tx + TestDIMcat20 + Parity + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
atx <- c(10,30,50,70,90,110)
emm <- emmeans(mm0, ~Tx*TestDIMcat20) %>% tidy()
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
emm$MY <- emm$estimate
emm$LCL <- emm$asymp.LCL
emm$UCL <- emm$asymp.UCL
emm %>% select(Tx,TestDIMcat20,MY,LCL,UCL)
Plotting model as predicted values
curve <- ggplot(emm) + coord_cartesian(ylim = (c(0,55))) + aes(x=TestDIMcat20, y=MY, group=Tx, colour=Tx) + geom_point() + geom_line(aes(colour=Tx,linetype=Tx)) + geom_ribbon(aes(ymin=emm$LCL, ymax=emm$UCL,colour=Tx,fill=Tx), linetype=0, alpha=0.1)
curve
Reported as estimated marginal means by test day category with interaction with herd test DIM
mm0 <- lmer(MY ~ Tx*TestDIMcat20 + Parity + DOMY + (1|FARMID/CowID), data=SDCTCOWDHI)
atx <- c(10,30,50,70,90,110)
emm <- emmeans(mm0, ~Tx*TestDIMcat20, at=list(atx)) %>% tidy()
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(pbkrtest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, set emm_options(lmerTest.limit = 3990) or larger,
but be warned that this may result in large computation time and memory use.
emm$MY <- emm$estimate
emm$LCL <- emm$asymp.LCL
emm$UCL <- emm$asymp.UCL
emm <- emm %>% select(Tx,TestDIMcat20,MY,LCL,UCL)
curve <- ggplot(emm) + coord_cartesian(ylim = (c(0,55))) + aes(x=TestDIMcat20, y=MY, group=Tx, colour=Tx) + geom_point() + geom_line(aes(colour=Tx,linetype=Tx)) + geom_ribbon(aes(ymin=emm$LCL, ymax=emm$UCL,colour=Tx,fill=Tx), linetype=0, alpha=0.1)
curve
Checking homoskedasticity assumption (variance of residuals)
mm0 <- lmer(MY ~ Tx + TestDIMcat20 + Parity +DOMY+ (1|FARMID/CowID), data=SDCTCOWDHI)
ggplot(data.frame(eta=predict(mm0,type="link"),pearson=residuals(mm0,type="pearson")),
aes(x=eta,y=pearson)) +
geom_point() +
theme_bw()
Checking normality of residuals
qqnorm(residuals(mm0))
I am happy with homoscedasticity and normal residuals assumptions